

cd "../replication-package"


/*


  This do-file cleans the female legislator variable.
  
  Compiled by the CSPP, from the CAWP 
 
  The data I use are from https://cspp.ippsr.msu.edu/cspp/
  Version 2.6. "cspp_data_2023-04-18.csv" 
  1) select all years and variable categories 
  2) in the "Related To" search box, search for "pctfemaleleg"
  3) check if correctly selected, using "Variable matches" column
  4) download the data from the "Download Search Result" column
  
  
  For full data, 
  http://ippsr.msu.edu/public-policy/correlates-state-policy
  (full data "correlates2-6.csv" downloaded on 2023.04.18)


*/


import delimited "data/raw/cspp_data_2023-04-18.csv", varnames(1) clear
keep if inrange(year, 1970, 2020)
drop if st=="DC"

sort st year 
replace pctfemaleleg = "" if pctfemaleleg=="NA"
destring pctfemaleleg, replace
replace pctfemaleleg = 0.01*pctfemaleleg

gen pct_maleleg = 1 - pctfemaleleg

gen maleleg_missing = pct_maleleg==. 
tab state year if maleleg_missing==1

// replace with adjacent years 
sort state year
bys state: replace pct_maleleg = (pct_maleleg[_n-1] + pct_maleleg[_n+1])/2 ///
				if pct_maleleg==. & pct_maleleg[_n-1]!=. & pct_maleleg[_n+1]!=. 
drop maleleg_missing
gen maleleg_missing = pct_maleleg==. 
tab state year if maleleg_missing==1 // 2020 missing; use 2019 to fill in 
drop maleleg_missing

qui levelsof state_fips, local(statelvl)
foreach state of local statelvl {
	
	qui sum pct_maleleg if year==2019 & state_fips==`state'
	replace pct_maleleg = r(mean) if year==2020 & state_fips==`state'


}
sort st year

gen state_a = state_fips
label define state_a  1 "AL" 2 "AK" 5 "AR" 4 "AZ" 6 "CA" 8 "CO" 9 "CT" 10 "DE" ///
		11 "DC" 12 "FL" 13 "GA" 15 "HI" 16 "ID" 19 "IA" 17 "IL" 18 "IN" 20 "KS" 21 "KY" 22 "LA" ///
		25 "MA" 24 "MD" 23 "ME" 26 "MI" 27 "MN" 29 "MO" 28 "MS" 30 "MT" 37 "NC" 38 "ND" ///
		31 "NE" 33 "NH" 34 "NJ" 35 "NM" 32 "NV" 36 "NY" 39 "OH" 40 "OK" 41 "OR" 42 "PA" ///
		44 "RI" 45 "SC" 46 "SD" 47 "TN" 48 "TX" 49 "UT" 51 "VA" 50 "VT" 53 "WA" 55 "WI" ///
		54 "WV" 56 "WY", replace
label val state_a state_a	

keep year state_a pct_maleleg 
keep if inrange(year, 1982, 2020)
	

save "data/state/05_pct_maleleg_CAWP.dta", replace 

